import requests as r
import re
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import seaborn as sns
import time
import matplotlib.pyplot as plt
%matplotlib inline
sns.set()
import warnings
warnings.simplefilter('ignore') #ignore the warnings, not the errors


df=pd.read_csv('NUMeconomic_freedom_index2019_exploratory.csv')
df
Blue: Developed countries
pd.read_html('https://fr.wikipedia.org/wiki/Pays_d%C3%A9velopp%C3%A9#cite_note-1')[0].iloc[:,0]
developed="""United States,
Canada,
Singapore,
Hong Kong,
Japan,
South Corea,
Israel,
Taiwan,
Norway,
Switzerland,
Germany,
Denmark,
Netherlands,
Ireland,
Finland,
Sweden,
Liechtenstein,
United Kingdom,
Luxembourg,
France,
Belgium,
Austria,
Slovenia,
Italy,
Spain,
Czech Republic,
Greece,
Estonia,
Andorra,
Cyprus,
Malta,
Slovakia,
Portugal,
Australia,
New Zealand"""
developed_list=[i.strip().replace(',','') for i in developed.split('\n')]
df['Developed']=df['Country Name'].isin(developed_list)
df["Developed"]=np.where(df["Country Name"]=='Korea, South',1,df["Developed"])
df["Developed"] = df["Developed"].astype(int)
# no andorra
# thanks Tiago
df

import plotly.express as px
fig = px.scatter(y=df['2019 Score'], x=df['GDP per Capita (PPP)'], color=df.Developed, hover_name=df.Country, labels={"x": "GDP per Capita (PPP), $", "y": "Economic Freedom Score"})
fig.update_layout(hovermode="x")
fig.show()

weighted values

![]()
![]()
cols_to_drop=[]
cols_to_drop.append('CountryID')
cols_to_drop.append('WEBNAME')
cols_to_drop.append('Region')
cols_to_drop.append('World Rank')
cols_to_drop.append('Region Rank')
cols_to_drop.append('Country')
cols_to_drop.append('2019 Score') #Not sure drop it or not
cols_to_drop.append('Country Name')
df.fillna(df.mean(),inplace=True)
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, f1_score, precision_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
X_train, X_test,y_train,y_test = train_test_split(df.drop('Developed',axis=1), \
df.Developed, \
test_size=1/3, \
random_state=42)

# decision tree
model1=DecisionTreeClassifier()
model1.fit(X_train, y_train)
y_pred1=model1.predict(X_test)
conf1=confusion_matrix(y_test, y_pred1)
acc1=accuracy_score(y_test, y_pred1)
rec1=recall_score(y_test, y_pred1)
pre1=precision_score(y_test, y_pred1)
f11=f1_score(y_test, y_pred1)
display(conf1)
print('Accuracy', acc1)
print('Recall', rec1)
print('Precision', pre1)
print('F1', f11)
# Random forest
model1=RandomForestClassifier()
model1.fit(X_train, y_train)
y_pred1=model1.predict(X_test)
conf1=confusion_matrix(y_test, y_pred1)
acc1=accuracy_score(y_test, y_pred1)
rec1=recall_score(y_test, y_pred1)
pre1=precision_score(y_test, y_pred1)
f11=f1_score(y_test, y_pred1)
display(conf1)
print('Accuracy', acc1)
print('Recall', rec1)
print('Precision', pre1)
print('F1', f11)
# Random forest balanced
model1=RandomForestClassifier(class_weight='balanced')
model1.fit(X_train, y_train)
y_pred1=model1.predict(X_test)
conf1=confusion_matrix(y_test, y_pred1)
acc1=accuracy_score(y_test, y_pred1)
rec1=recall_score(y_test, y_pred1)
pre1=precision_score(y_test, y_pred1)
f11=f1_score(y_test, y_pred1)
display(conf1)
print('Accuracy', acc1)
print('Recall', rec1)
print('Precision', pre1)
print('F1', f11)
# adaboost
model1=AdaBoostClassifier()
model1.fit(X_train, y_train)
y_pred1=model1.predict(X_test)
conf1=confusion_matrix(y_test, y_pred1)
acc1=accuracy_score(y_test, y_pred1)
rec1=recall_score(y_test, y_pred1)
pre1=precision_score(y_test, y_pred1)
f11=f1_score(y_test, y_pred1)
display(conf1)
print('Accuracy', acc1)
print('Recall', rec1)
print('Precision', pre1)
print('F1', f11)
from xgboost import XGBClassifier
from catboost import CatBoostlassifier
#dont work
![]()
knn=KNeighborsClassifier(5)
knn.fit(X_train,y_train)
y_pred1=knn.predict(X_test)
display(confusion_matrix(y_pred1,y_test))
display(accuracy_score(y_pred1,y_test))
lr=LogisticRegression(max_iter=10000)
lr.fit(X_train,y_train)
y_pred2=lr.predict(X_test)
display(confusion_matrix(y_pred2,y_test))
display(accuracy_score(y_pred2,y_test))
sv=SVC()
sv.fit(X_train,y_train)
y_pred3=sv.predict(X_test)
display(confusion_matrix(y_pred3,y_test))
display(accuracy_score(y_pred3,y_test))
nb=GaussianNB()
nb.fit(X_train,y_train)
y_pred4=nb.predict(X_test)
display(confusion_matrix(y_pred4,y_test))
display(accuracy_score(y_pred4,y_test))
#wrap
import plotly
plotly.offline.init_notebook_mode()